Libraries
library(tidyverse)
library(readxl)
library(ggforce)
library(knitr)
library(forcats)













pitcher_test <- pitchers %>%
mutate(group_indicator = case_when(
ID == "Great" ~ 1,
ID == "Decent" ~ 2,
ID == "Bad" ~ 3))
# Correlation between spin rate and run expectancy
pitcher_test %>%
filter(!is.na(release_spin_rate) & !is.na(run_exp_added)) %>%
with( cor(release_spin_rate, run_exp_added) )
## [1] 0.01304765
# Correlation between spin rate and run expectancy
pitcher_test %>%
filter(!is.na(pitch_speed) & !is.na(run_exp_added)) %>%
with( cor(pitch_speed, run_exp_added) )
## [1] -0.008914642
# Can batted ball type predict run expectancy?
lm(run_exp_added ~ bb_type, data = pitcher_test)
##
## Call:
## lm(formula = run_exp_added ~ bb_type, data = pitcher_test)
##
## Coefficients:
## (Intercept) bb_typeground_ball bb_typeline_drive bb_typepopup
## -0.1025 0.1706 -0.1393 0.3284
# Intercept = Fly Ball
test_model <- pitcher_test %>%
filter( !(player_name %in%
c("Scherzer, Max", "Taillon, Jameson", "Berríos, José")) ) %>%
lm(run_exp_added ~ bb_type, data = .)
test_testdata <- pitcher_test %>%
filter(player_name %in%
c("Scherzer, Max", "Taillon, Jameson", "Berríos, José"),
!is.na(bb_type)) %>%
select(ID, pitch_type, run_exp_added, bb_type)
test_testdata %>%
mutate(preds = predict(test_model, test_testdata))
## # A tibble: 1,442 × 5
## ID pitch_type run_exp_added bb_type preds
## <chr> <chr> <dbl> <chr> <dbl>
## 1 Great SL 0.207 ground_ball 0.0784
## 2 Great CU 0.221 ground_ball 0.0784
## 3 Great CH -1.03 fly_ball -0.111
## 4 Great FF 0.181 fly_ball -0.111
## 5 Great FF -1.66 fly_ball -0.111
## 6 Great SL -0.27 fly_ball -0.111
## 7 Great FF 0.206 ground_ball 0.0784
## 8 Great SL -0.183 ground_ball 0.0784
## 9 Great SL 0.406 ground_ball 0.0784
## 10 Great FC -0.752 ground_ball 0.0784
## # ℹ 1,432 more rows
# row.names = FALSE